Load Python Libraries Here¶

In [61]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
import tensorflow
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import numpy as np 
import pandas as pd 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.utils import gen_batches
import networkx as nx
import matplotlib.pyplot as plt
import re
import numpy as np
from IPython.display import display
from matplotlib import pyplot as plt
from wordcloud import WordCloud
import seaborn as sns
import json

Read Dataset¶

In [2]:
# Importing data set
df = pd.read_csv("recipes_data.csv")
df.head(5)
Out[2]:
title ingredients directions link source NER site
0 No-Bake Nut Cookies ["1 c. firmly packed brown sugar", "1/2 c. eva... ["In a heavy 2-quart saucepan, mix brown sugar... www.cookbooks.com/Recipe-Details.aspx?id=44874 Gathered ["bite size shredded rice biscuits", "vanilla"... www.cookbooks.com
1 Jewell Ball'S Chicken ["1 small jar chipped beef, cut up", "4 boned ... ["Place chipped beef on bottom of baking dish.... www.cookbooks.com/Recipe-Details.aspx?id=699419 Gathered ["cream of mushroom soup", "beef", "sour cream... www.cookbooks.com
2 Creamy Corn ["2 (16 oz.) pkg. frozen corn", "1 (8 oz.) pkg... ["In a slow cooker, combine all ingredients. C... www.cookbooks.com/Recipe-Details.aspx?id=10570 Gathered ["frozen corn", "pepper", "cream cheese", "gar... www.cookbooks.com
3 Chicken Funny ["1 large whole chicken", "2 (10 1/2 oz.) cans... ["Boil and debone chicken.", "Put bite size pi... www.cookbooks.com/Recipe-Details.aspx?id=897570 Gathered ["chicken gravy", "cream of mushroom soup", "c... www.cookbooks.com
4 Reeses Cups(Candy) ["1 c. peanut butter", "3/4 c. graham cracker ... ["Combine first four ingredients and press in ... www.cookbooks.com/Recipe-Details.aspx?id=659239 Gathered ["graham cracker crumbs", "powdered sugar", "p... www.cookbooks.com

What fields are in the data?¶

In [3]:
df.columns.tolist()
Out[3]:
['title', 'ingredients', 'directions', 'link', 'source', 'NER', 'site']

How do fields in the data relate to each other?¶

All the fields are related to recipes:-

  • title is the name of recipe.
  • Each recipe consist of ingredients
  • and to make recipe we need instructions or directions.
  • Link shows the details related to recipe online.
  • Source shows the origin of recipe from where we get this recipe.
  • NER is the ingredient names without quantities and units
  • site is the URL.

</span>

Convert NER column JSON to List datatype¶

In [4]:
# Converting from json to array and then to numpy array
df["NER"] = df["NER"].apply(lambda x: np.array(json.loads(x)))

Make Feature No_of_Steps_to_make_recipe¶

  • The total steps required to make a recipe
In [5]:
# Assuming your DataFrame is named "df"
df['directions'] = df['directions'].apply(eval)  # Convert directions column to list type
In [6]:
df['No_of_Steps_to_make_recipe'] = df['directions'].apply(lambda x: len(x))
df.head(5)
Out[6]:
title ingredients directions link source NER site No_of_Steps_to_make_recipe
0 No-Bake Nut Cookies ["1 c. firmly packed brown sugar", "1/2 c. eva... [In a heavy 2-quart saucepan, mix brown sugar,... www.cookbooks.com/Recipe-Details.aspx?id=44874 Gathered [bite size shredded rice biscuits, vanilla, br... www.cookbooks.com 6
1 Jewell Ball'S Chicken ["1 small jar chipped beef, cut up", "4 boned ... [Place chipped beef on bottom of baking dish.,... www.cookbooks.com/Recipe-Details.aspx?id=699419 Gathered [cream of mushroom soup, beef, sour cream, chi... www.cookbooks.com 3
2 Creamy Corn ["2 (16 oz.) pkg. frozen corn", "1 (8 oz.) pkg... [In a slow cooker, combine all ingredients. Co... www.cookbooks.com/Recipe-Details.aspx?id=10570 Gathered [frozen corn, pepper, cream cheese, garlic pow... www.cookbooks.com 1
3 Chicken Funny ["1 large whole chicken", "2 (10 1/2 oz.) cans... [Boil and debone chicken., Put bite size piece... www.cookbooks.com/Recipe-Details.aspx?id=897570 Gathered [chicken gravy, cream of mushroom soup, chicke... www.cookbooks.com 6
4 Reeses Cups(Candy) ["1 c. peanut butter", "3/4 c. graham cracker ... [Combine first four ingredients and press in 1... www.cookbooks.com/Recipe-Details.aspx?id=659239 Gathered [graham cracker crumbs, powdered sugar, peanut... www.cookbooks.com 3

Make Feature No_of_ingredients_in_recipe¶

  • The total number of ingredients required to make a recipe
In [7]:
df['ingredients'] = df['ingredients'].apply(eval)  # Convert directions column to list type
In [8]:
df['No_of_ingredients_in_recipe'] = df['ingredients'].apply(lambda x: len(x))
df.head(5)
Out[8]:
title ingredients directions link source NER site No_of_Steps_to_make_recipe No_of_ingredients_in_recipe
0 No-Bake Nut Cookies [1 c. firmly packed brown sugar, 1/2 c. evapor... [In a heavy 2-quart saucepan, mix brown sugar,... www.cookbooks.com/Recipe-Details.aspx?id=44874 Gathered [bite size shredded rice biscuits, vanilla, br... www.cookbooks.com 6 6
1 Jewell Ball'S Chicken [1 small jar chipped beef, cut up, 4 boned chi... [Place chipped beef on bottom of baking dish.,... www.cookbooks.com/Recipe-Details.aspx?id=699419 Gathered [cream of mushroom soup, beef, sour cream, chi... www.cookbooks.com 3 4
2 Creamy Corn [2 (16 oz.) pkg. frozen corn, 1 (8 oz.) pkg. c... [In a slow cooker, combine all ingredients. Co... www.cookbooks.com/Recipe-Details.aspx?id=10570 Gathered [frozen corn, pepper, cream cheese, garlic pow... www.cookbooks.com 1 6
3 Chicken Funny [1 large whole chicken, 2 (10 1/2 oz.) cans ch... [Boil and debone chicken., Put bite size piece... www.cookbooks.com/Recipe-Details.aspx?id=897570 Gathered [chicken gravy, cream of mushroom soup, chicke... www.cookbooks.com 6 5
4 Reeses Cups(Candy) [1 c. peanut butter, 3/4 c. graham cracker cru... [Combine first four ingredients and press in 1... www.cookbooks.com/Recipe-Details.aspx?id=659239 Gathered [graham cracker crumbs, powdered sugar, peanut... www.cookbooks.com 3 5

Sort Data by Number of Ingredeints¶

In [9]:
df.sort_values(by='No_of_ingredients_in_recipe')
Out[9]:
title ingredients directions link source NER site No_of_Steps_to_make_recipe No_of_ingredients_in_recipe
1335130 Roasted Garlic & Dill Dressing [1 bulb of garlic 1cup buttermilk 1/2 cup mayo... [cut top off garlic, drizzle small amount of o... www.epicurious.com/recipes/member/views/roaste... Gathered [garlic] www.epicurious.com 1 1
57305 Cottage Cheese [any amount sour milk or fresh clabber] [Heat sour milk to a temperature of about 98° ... www.cookbooks.com/Recipe-Details.aspx?id=651009 Gathered [sour milk] www.cookbooks.com 4 1
2223401 Peg's Strawberry Tarts Recipe [1 quart. strawberries] [CREAM FILLING: Beat all ingredients together ... cookeatshare.com/recipes/peg-s-strawberry-tart... Recipes1M [strawberries] cookeatshare.com 6 1
1330070 Brazilian Black Bean Soup [ingredients] [Rinse the beans. Cover them with water, and l... www.epicurious.com/recipes/member/views/brazil... Gathered [ingredients] www.epicurious.com 11 1
1933284 Bubble Gum Ice Tea Recipe [2 ounce Bubble Gum Schnapps fill ice tea] [Ice // rocks] cookeatshare.com/recipes/bubble-gum-ice-tea-94464 Recipes1M [fill ice tea] cookeatshare.com 1 1
... ... ... ... ... ... ... ... ... ...
1997023 D.I.Y Spice Blends (No3) [1/2 cup dry mustard powder, 1/2 cup ground tu... [Mix the ingredients together well, then trans... cookpad.com/us/recipes/367445-diy-spice-blends... Recipes1M [sugar, brown sugar, anise, ground fennel seed... cookpad.com 40 99
719917 Dumplings And Cabbage [1 c. flour, 1 egg, milk, dash of salt, 1/2 c.... [If you do not have large potatoes, use more o... www.cookbooks.com/Recipe-Details.aspx?id=301825 Gathered [bread, sugar, brown sugar, fresh mushrooms, r... www.cookbooks.com 5 223
27310 Strawberry Butter [1 c. powdered sugar, 1 egg yolk, 1/2 c. butte... [My daughter made this soup, and I thought it ... www.cookbooks.com/Recipe-Details.aspx?id=301676 Gathered [bisquick, bread, sugar, noodles, liquid smoke... www.cookbooks.com 1 285
122527 Layered Salad [lettuce, torn into small pieces, carrots, gra... [This recipe was in the drawer for some years,... www.cookbooks.com/Recipe-Details.aspx?id=301736 Gathered [sugar, chunky chicken, curry powder, accent, ... www.cookbooks.com 5 338
719239 Grandma'S Chicken Fricassee [1 lb. chopped meat, 1 egg, 1/2 c. seasoned br... [Paula and I collaborated on this recipe, and ... www.cookbooks.com/Recipe-Details.aspx?id=301776 Gathered [american cheese, sugar, noodles, accent, chil... www.cookbooks.com 4 407

2231142 rows × 9 columns

image.png

Some recipe as shown in above figure are those where directions are not available.

Sort Data by No_of_Steps_to_make_recipe¶

In [10]:
df.sort_values(by='No_of_Steps_to_make_recipe')
Out[10]:
title ingredients directions link source NER site No_of_Steps_to_make_recipe No_of_ingredients_in_recipe
1294279 Broccoli Salad [2 c. chopped fresh broccoli, 1/2 c. unsalted ... [Combine first ingredients and fold in with dr... www.epicurious.com/recipes/member/views/brocco... Gathered [fresh broccoli, wine vinegar, dressing, ¼, gr... www.epicurious.com 1 8
1575108 Cape Cod Cocktail [4 ounces, fluid Cranberry Juice, 1 ounce, flu... [Pour the cranberry juice and vodka into a hig... tastykitchen.com/recipes/drinks/cape-cod-cockt... Gathered [lime, fluid vodka, cranberry juice] tastykitchen.com 1 3
1273781 Grandmother Cora Horton'S Millionaire Fudge [4 1/2 c. white sugar, 1 tall can evaporated m... [Chocolate, nuts and marshmallow stuff into a ... www.epicurious.com/recipes/member/views/grandm... Gathered [white sugar, oleo, marshmallow cream, walnuts... www.epicurious.com 1 6
1575110 Vanilla Cinnamon Smoothie [1/2 cups Whole Milk, 3/4 cups Plain Greek Yog... [In a blender, add all ingredients and run on ... tastykitchen.com/recipes/drinks/vanilla-cinnam... Gathered [¼, greek yogurt, cinnamon, milk] tastykitchen.com 1 4
522016 Meringue Cookies [2 large egg whites, 3/4 c. sugar, 1 tsp. vani... [Beat egg whites until foamy. Gradually add su... www.cookbooks.com/Recipe-Details.aspx?id=14161 Gathered [sugar, vanilla, egg whites] www.cookbooks.com 1 3
... ... ... ... ... ... ... ... ... ...
2012938 Making Egg Dough Pastas [DRY, 2 cups all-purpose flour, WET, 9 egg yol... [Measure the flour and shake it through a siev... www.epicurious.com/recipes/food/views/making-e... Recipes1M [extra-virgin olive oil, wet, egg yolk, eggs, ... www.epicurious.com 145 21
1702484 Cannelloni with Asparagus and Ham [2 pounds asparagus, 6 tablespoons butter, 1 c... [Trim 1 inch or more off the butt ends of the ... www.cookstr.com/recipes/cannelloni-with-aspara... Recipes1M [nutmeg, eggs, ham, water, flour, asparagus, m... www.cookstr.com 148 13
2210363 Soft Cheesy Pretzel [1 1/2 cups flour, all-purpose, 2 teaspoons ba... [Combine first 4 ingredients in a medium bowl;... recipeland.com/recipe/v/soft-cheesy-pretzel--5... Recipes1M [baking powder, sugar, eggs, cheddar cheese, f... recipeland.com 151 9
1739457 Mike's NY Cheesecake 101 [30 Graham Crackers (crushed for crust), 4 tbs... [Preheat oven to 350 (175 degrees C)., Triple ... cookpad.com/us/recipes/352287-mikes-ny-cheesec... Recipes1M [mixing bowl, sugar, vanilla, crackers, eggs, ... cookpad.com 163 19
1717085 Moon Cakes [1 1/4 cups sugar, 1/4 plus 1/8 teaspoon citri... [In a small saucepan, combine the sugar, citri... www.epicurious.com/recipes/food/views/moon-cak... Recipes1M [caramel sauce, pork fatback, sugar, eggs, siu... www.epicurious.com 164 27

2231142 rows × 9 columns

Data Quality Checks¶

(1) - Missing Values Analysis¶

In [11]:
# Check for missing values in the dataset
missing_values = df.isnull().sum()
missing_values
Out[11]:
title                          0
ingredients                    0
directions                     0
link                           0
source                         0
NER                            0
site                           0
No_of_Steps_to_make_recipe     0
No_of_ingredients_in_recipe    0
dtype: int64

There is no missing values in dataset, hence no need to handle missing values. In gereneral following methods are used to handle missing values.


  • Deletion: Remove rows or columns with missing values. This can be done if the missing data is sparse and won't significantly impact the analysis.
  • Imputation: Fill in missing values with estimated or calculated values. Common imputation techniques include mean, median, mode imputation, as well as regression imputation and hot deck imputation.
  • Advanced Imputation: Utilize advanced techniques such as multiple imputation, which creates multiple imputed datasets and combines the results, or predictive modeling to estimate missing values based on the relationship with other variables.
  • Indicator Variables: Create a new binary column indicating the presence or absence of missing values in a particular variable. This allows the model to recognize and utilize the missingness pattern as a predictive feature.
  • Domain Knowledge: Utilize domain knowledge to determine meaningful ways to handle missing values. For example, for time series data, missing values may be interpolated based on previous and subsequent values.
  • Model-Based Imputation: Use machine learning algorithms to predict missing values based on other features in the dataset. This approach leverages the relationships between variables to estimate missing values.
  • Treat Missingness as a Separate Category: For categorical variables, treat missing values as a separate category rather than imputing or deleting them. This allows the model to capture any potential information contained in the missingness.
  • </ul></span>

(2) - Check Data Types¶

In [12]:
# Check the data types of columns in the dataset
data_types = df.dtypes
data_types
Out[12]:
title                          object
ingredients                    object
directions                     object
link                           object
source                         object
NER                            object
site                           object
No_of_Steps_to_make_recipe      int64
No_of_ingredients_in_recipe     int64
dtype: object

(3) - Check Unique Values¶

In [13]:
# Check unique values in a specific column
for c in df.columns:
    try:
        unique_values = df[c].nunique()
    except:
        continue
    print(c, " has ",unique_values, " values ")
title  has  1312871  values 
link  has  2231142  values 
source  has  2  values 
site  has  28  values 
No_of_Steps_to_make_recipe  has  127  values 
No_of_ingredients_in_recipe  has  81  values 

(4) - Boxplot Analysis/ Outlier Detection¶

In [14]:
plt.figure(figsize=(16, 6))
sns.boxplot(x=df['No_of_ingredients_in_recipe'])
plt.show()
In [15]:
df['No_of_ingredients_in_recipe'].min(), df['No_of_ingredients_in_recipe'].mean(), df['No_of_ingredients_in_recipe'].max()
Out[15]:
(1, 8.727042474212757, 407)

From above figure it is clear that there are outliers in data for feature No_of_ingredients_in_recipe. The average number of ingredients are around 9, and the maximum number of ingredients are 407, the range is very high, which shows the presence of outliers in data.

Lets Analyze Max number of ingredients recipe¶

In [16]:
max_ingredients = df[df['No_of_ingredients_in_recipe'] == df['No_of_ingredients_in_recipe'].max()]
max_ingredients
Out[16]:
title ingredients directions link source NER site No_of_Steps_to_make_recipe No_of_ingredients_in_recipe
719239 Grandma'S Chicken Fricassee [1 lb. chopped meat, 1 egg, 1/2 c. seasoned br... [Paula and I collaborated on this recipe, and ... www.cookbooks.com/Recipe-Details.aspx?id=301776 Gathered [american cheese, sugar, noodles, accent, chil... www.cookbooks.com 4 407
In [17]:
max_ingredients.values
Out[17]:
array([["Grandma'S Chicken Fricassee",
        list(['1 lb. chopped meat', '1 egg', '1/2 c. seasoned bread crumbs', '2 Tbsp. ketchup', '1 tsp. salt', '1/2 tsp. pepper', '6 to 7 potatoes (medium)', '2 1/2 lb. ham end', '1 c. milk', '4 Tbsp. seasoned bread crumbs', '4 Tbsp. butter', '1 lb. ham steak, cubed', '2 c. celery, sliced', '3/4 c. mayonnaise', '1 can chunky chicken soup', '2 Tbsp. lemon juice', '3 eggs, hard-boiled and cut up', '1 (8 oz.) can mushroom stems and pieces', '1 c. walnuts', '1/2 c. butter', '2 c. stuffing mix', '12 oz. corn bread twists', '1 lb. ground ham', '1 can Cheddar cheese soup', '1 c. seasoned bread crumbs', '1 pkg. onion soup mix', '4 Tbsp. butter, divided', '4 Tbsp. flour, divided', '2 tsp. seasoned salt, divided', '2 to 2 1/2 lb. boneless veal', '1 Tbsp. paprika, divided', '1 can chicken broth', '1/2 lb. mushrooms, sliced', '2 medium onions, sliced', '1/4 c. steak sauce', 'hot buttered noodles', '1 roast beef (with or without bones, any size)', '8 oz. ditalini macaroni', '1 (15 oz.) can garbanzo beans', '2 c. cooked chicken, cubed', '8 oz. sour cream', '5 oz. Cheddar cheese soup', '1 tsp. oregano', '1 tsp. garlic powder', '1/2 tsp. seasoned salt', '1/2 tsp. black pepper', '1/2 tsp. soy sauce', '2 to 3 lb. meat loaf mix (1 lb. beef, 1/2 lb. pork and 1/2 lb. veal)', '2 to 3 eggs', '1 c. Italian style bread crumbs', '1/2 c. grated Parmesan cheese', '1 Tbsp. oregano', 'spaghetti sauce', '1 lb. sweet Italian sausage (optional)', '1 1/2 c. instant rice', '1 can chicken broth', '1 lb. shrimp', '1 lb. smoked sausage (link)', '1 c. green pepper, cut in medium pieces', '1 c. celery, sliced diagonally', '1 c. onions, cut fine', '1 (1 lb.) can stewed tomatoes, cut into pieces', '2 cans condensed tomato soup', '6 to 8 drops hot pepper sauce', '1/2 tsp. black pepper', '1 tsp. seasoned salt', '1 tsp. garlic powder', '1 1/2 lb. bulk sweet sausage', '1 (32 oz.) jar spaghetti sauce', '1 can condensed cream of tomato soup', '1 green pepper, cut into small pieces', '1 medium onion, diced fine', '6 lasagne noodles', '1 small eggplant', '4 to 6 Tbsp. olive oil', '1 c. grated Parmesan cheese, divided', '1/4 lb. Mozzarella cheese, sliced thin', '1 medium head cabbage', '1 tsp. salt', '1/2 tsp. pepper', '2 lb. meat loaf mix', '2 pkg. onion soup mix', '1/2 c. instant rice', '2 cans condensed tomato soup', '2 cans water', '1 lb. meat loaf mix (beef, pork and veal)', '1/2 medium onion, chopped fine', '1/2 c. seasoned bread crumbs', '1 egg', '1/4 c. Parmesan cheese, grated', '1/4 c. milk', '1/2 tsp. garlic powder', '1/2 tsp. seasoned salt', '1/4 tsp. pepper', '1 Tbsp. olive oil', '1 jar chicken gravy', '1 c. sour cream', '1 Tbsp. paprika', '1 1/2 lb. ground chuck', '1 c. seasoned bread crumbs', '1 egg', '2 green onions, cut small', '1/2 c. spaghetti sauce', '1/4 tsp. black pepper', '1 (16 oz.) cream-style corn', '1 pkg. Knorr onion soup mix', '6 large green peppers or 8 medium', '1 1/4 lb. ground lamb', '2 c. instant rice (uncooked)', '1 can cream of mushroom soup', '1 (2.62 oz.) jar McCormick Salad Supreme seasoning', '1 large jar spaghetti sauce, divided', '1 lb. lean ground beef', '1 onion, chopped', '2 (16 oz.) cans kidney beans with sauce', '1 (16 oz.) jar salsa (mild)', '1 Tbsp. chili powder', '1 1/2 lb. top round steak, sliced thin', '4 oz. sliced mushrooms (fresh or canned)', '2 Tbsp. butter or margarine', '1 pkg. Lipton dry onion soup mix', '1 Tbsp. soy sauce', '2 Tbsp. flour', '1 c. milk', '1 c. water', '1 1/2 c. green peppers, sliced thin', '2 pork cutlets or boned pork chops', 'cooking oil', 'Accent', 'garlic powder', 'black pepper', '1/2 c. green pepper, in strips', '1/2 c. onion, diced', '1/2 c. celery, sliced', '1 c. stewed tomatoes', '1 c. tomato sauce', '1 Tbsp. basil', '1 (2 lb.) pkg. sauerkraut', '2 lb. spareribs', '2 apples', '1 pkg. onion soup mix', '1 Tbsp. caraway seed', 'salt', 'pepper', 'sugar', "3 to 4 lb. brisket or top round (If you're not sure, ask the butcher or clerk in the supermarket for a pot roast.)", '1 bottle Heinz chili sauce', '1 pkg. onion soup mix (Knorr or Lipton)', '1 can or bottle beer', '1 qt. sauerkraut, drained', '8 oz. noodles, cooked according to pkg. instructions', '2 cans condensed cream of chicken soup', '1 tsp. dry mustard', '1/2 c. mayonnaise', '1/2 c. chopped onions', '16 oz. corned beef (canned is good)', '8 oz. Swiss cheese, grated', '1/2 c. rye bread crumbs*', '2 cans water', '1 lb. super lean ground beef or ground veal', '1 Tbsp. olive oil', '1 (48 oz.) jar Ragu spaghetti sauce with meat', '1 (8 oz.) can tomato sauce', '1 lb. rigatoni, cooked and drained', '1 Tbsp. oregano', '1 tsp. salt', '1 tsp. garlic powder', '1 tsp. onion powder', '1/4 tsp. black pepper', '12 oz. small curd cottage cheese', '1 lb. Mozzarella cheese, sliced', '2 oz. Parmesan cheese', '4 chicken breast halves', '1 egg', '2 Tbsp. milk', 'salt', 'pepper', 'garlic powder', 'corn flakes', '3 oz. potato pancake mix', '2 eggs', '1 c. water', '1 (15 to 16 oz.) can salmon', '1 green onion, cut into small pieces', '1 Tbsp. parsley', '1 tsp. garlic powder', '1/2 tsp. pepper', 'cooking oil', '1 lb. scallops (fresh are best)', '2 Tbsp. butter', '1 tsp. lemon juice', '1 c. Italian seasoned bread crumbs', '1 c. grated American cheese', '1/2 lb. spinach linguini or fettucini', '1/4 c. butter', '1 tsp. garlic', '1/8 tsp. freshly ground black pepper', '1 c. half and half', '1/2 lb. Sea Stix (salad style)', '1/2 c. Parmesan cheese', '1/2 lb. shrimp', '1/2 lb. scallops', '1/2 lb. crab meat or Sea Stix', '1/2 lb. mushrooms, sliced', '1/4 lb. butter', '1/2 pt. heavy cream', '2 Tbsp. sherry', '1 c. sharp Cheddar cheese', '1 tsp. Worcestershire sauce', '1 tsp. garlic powder', '2 Tbsp. flour', '2 Tbsp. bread crumbs', '12 frozen pastry shells', '1 lb. boneless, skinless chicken breasts', '2 egg yolks', '1 c. ice water', '1 c. flour', '2 Tbsp. sesame oil', '1 Tbsp. soy sauce', '2 Tbsp. honey', '2 Tbsp. sweet and sour sauce', '1 Tbsp. sesame seed', '3/4 lb. shrimp', '2 Tbsp. butter', '1 tsp. garlic powder*', '2 Tbsp. seasoned bread crumbs', 'oregano', '8 oz. spaghetti', '2 Tbsp. butter', '1/2 c. grated Parmesan cheese', '2 eggs, well beaten', '12 oz. cottage cheese', '1 1/4 to 1 1/2 lb. meat loaf mix (beef, veal and pork)', '1/2 c. onion, chopped', '1/2 c. green pepper, chopped', '1 (8 oz.) can sliced tomatoes', '1 (8 oz.) can tomato sauce', '1 tsp. oregano', '1/2 tsp. garlic powder', '1/2 c. shredded Mozzarella cheese', '12 lasagna noodles', '2 Tbsp. olive oil', '2 lb. ground beef', '1/2 c. onion, minced', '2 (6 oz.) cans tomato paste', '1 (15 oz.) can tomato sauce', '2 tsp. basil', '2 tsp. parsley', '2 tsp. sugar', '1 tsp. salt', '1 tsp. oregano', '1/2 tsp. garlic salt', '1/4 tsp. pepper', '16 oz. Ricotta', '8 oz. Mozzarella, sliced', '1 tsp. salt', '1/2 tsp. pepper', '1 lb. boneless pork', '1/2 c. soy sauce', '1 tsp. ginger', '16 oz. Japanese style vegetables or 2 pkg. stir-fry Japanese style vegetables', 'chow mein noodles', '6 green peppers', '1 1/2 lb. meat loaf mix (beef, veal and pork)', '4 eggs', '3/4 c. instant rice', '3/4 c. grated cheese', '1 1/2 tsp. black pepper', '1 1/2 tsp. garlic powder', '1 1/2 tsp. oregano', '1 large jar spaghetti sauce', '1 lb. jumbo shells', '1 1/2 lb. ground beef', '1 medium onion, chopped', '1 egg', '1/2 c. seasoned bread crumbs', '1/4 c. grated Parmesan cheese', '1 tsp. salt', '1/2 tsp. pepper', '16 oz. spaghetti sauce', '3 large zucchini', '2 Tbsp. olive oil', '1 1/4 to 1 1/2 lb. meat loaf mix (beef, pork and veal)', '1 large onion', '1 Tbsp. oregano', '1 tsp. garlic powder', '1 (6 oz.) pkg. chicken flavored rice pilaf mix', '1 (32 oz.) jar spaghetti sauce, divided', '1 c. grated Parmesan cheese', '4 Tbsp. cooking oil', '1 Tbsp. garlic powder', '1 1/4 to 1 1/2 lb. boneless pork, cut into 1-inch cubes', '1 medium onion, chopped', '1 green pepper, cut into 1-inch pieces', '1/2 red pepper (sweet), cut into 1-inch pieces', '1 (15 oz.) can pineapple chunks in natural syrup', '1/4 c. honey', '1/4 c. soy sauce', '2 Tbsp. cider vinegar', '2 to 3 Tbsp. cornstarch', 'chow mein noodles', '1/2 lb. wide noodles', '1 (12 oz.) can Cheddar cheese soup', '6 oz. milk', '1 (5 oz.) can evaporated milk', '2 Tbsp. butter', '2 (6 oz.) cans solid white tuna', '1/2 sweet red pepper, cut up into small pieces', '1 (8 oz.) can sweet peas, drained', '1 Tbsp. Dijon mustard', '1/2 tsp. black pepper', '1 (3 oz.) can French fried onions', '2 medium onions', '1/4 c. butter', '2 (6 1/2 oz.) cans tuna, drained', '2 c. shredded Cheddar, divided', '1 c. baking mix', '1 1/4 c. milk', '1 tsp. seafood seasoning', '3 eggs', '2 medium tomatoes, thinly sliced', '1 (8 oz.) pkg. frozen green peas in cream sauce', '1 c. milk', '2 c. turkey, cubed', '1 1/2 c. mushrooms, sliced', '1 Tbsp. butter', '1 Tbsp. grated Parmesan cheese', '1/2 tsp. garlic powder', '1/2 tsp. black pepper', '1 pkg. Betty Crocker fettuccine Alfredo Tuna Helper', '1 c. cooked turkey or chicken, cut into bite size pieces', '2 lb. veal boneless shoulder, cubed', '2 Tbsp. olive oil', '2 large green peppers, cut into strips', '1 large red pepper, cut into strips', '4 green onions, cut into small pieces', '1/2 lb. mushrooms, sliced', '1 (32 oz.) jar spaghetti sauce', '2 Tbsp. grated Parmesan cheese', '1 tsp. oregano', '3 large zucchini', '4 Tbsp. olive oil, divided', '1 (32 oz.) jar spaghetti sauce, divided', '1 1/2 lb. meat loaf mix (beef, pork and veal)', '1/2 c. chopped onion', '1 Tbsp. oregano', '1 tsp. garlic powder', '6 oz. chicken flavored rice pilaf mix', '1/4 lb. sliced Mozzarella cheese', '1 c. grated Parmesan cheese', '2 lb. fresh asparagus spears or canned asparagus', '1 (10 oz.) can cream of asparagus soup', '1/2 c. sour cream', '1 can French fried onions', '1 lb. frozen cauliflower', '2 (1 lb.) cans asparagus pieces', '1 medium onion, diced fine', '1 can condensed mushroom soup', '4 oz. grated Cheddar cheese', '2 oz. slivered almonds', '1 head broccoli*', '1 c. water', '1/2 tsp. salt', '1 (10 oz.) can broccoli cheese soup', '2 Tbsp. water', '4 oz. sour cream', '1/2 can French fried onions', '1 lb. frozen carrots, sliced', '1/4 lb. butter (1 stick)', '1 (8 oz.) can tomato sauce', '1/4 c. sugar', '1 green onion, diced', '1/4 green pepper, diced', '1 stalk celery, sliced', '1 lb. frozen cauliflower florets', '3 Tbsp. butter', '2 Tbsp. seasoned bread crumbs', '1 Tbsp. sherry', '2 Tbsp. grated cheese', '1 lb. frozen cauliflower florets', '1 can cream of mushroom soup', '4 oz. sour cream', '1/2 c. milk', '1/4 tsp. black pepper', '1/4 tsp. garlic powder', '2 Tbsp. grated Parmesan cheese', '1 (2.8 oz.) can French fried onions, divided', '1 lb. fresh asparagus', '1 Tbsp. sesame seed', '2 Tbsp. butter', '1 tsp. sesame oil', '1 tsp. soy sauce', '4 slices bacon', '2 green onions', '1 (10 oz.) pkg. frozen carrots and peas', '1/2 c. water', '1 1/2 c. shredded lettuce', '1 Tbsp. parsley flakes', '1 tsp. seasoned salt', '1 tsp. garlic powder', '1/2 tsp. pepper', '1/4 c. butter', '1/2 c. onions, chopped', '4 c. cabbage, sliced', '1/2 tsp. salt', '1/4 tsp. pepper', '2 chicken bouillon cubes, crushed']),
        list(['Paula and I collaborated on this recipe, and it was fun.', 'This dish is quite different from my usual fare, but my wonderful wife said it is popular in mill towns.', "I'll list it under vegetables, but it could be used as a main dish.", 'Try it, I think you will be quite pleased.']),
        'www.cookbooks.com/Recipe-Details.aspx?id=301776', 'Gathered',
        array(['american cheese', 'sugar', 'noodles', 'accent', 'chili powder',
               'potato pancake mix', 'green onion', 'sweet and', 'baking mix',
               'sweet red pepper', 'almonds', 'sweet peas', 'cabbage', 'rigatoni',
               'frozen carrots', 'garlic salt', 'water', 'walnuts', 'ground beef',
               'mustard', 'fettucini', 'sour cream', 'wide noodles',
               'ditalini macaroni', 'cream of mushroom soup', 'lasagna noodles',
               'red pepper', 'ground chuck', 'onions', 'eggplant', 'milk',
               'apples', 'sherry', 'cottage cheese', 'beef', 'carrots',
               'fresh asparagus spears', 'jumbo shells', 'onion soup', 'salmon',
               'parsley flakes', 'pork cutlets', 'pepper sauce',
               'salad supreme seasoning', 'shrimp', 'garlic',
               'freshly ground black pepper', 'pastry shells', 'meat',
               'soy sauce', 'mein noodles', 'chicken gravy', 'tuna',
               'italian style bread crumbs', 'swiss cheese', 'turkey',
               'bread crumbs', 'pineapple', 'chopped meat', 'potatoes',
               'kidney beans', 'sweet italian sausage', 'steak sauce',
               'shredded lettuce', 'tomato soup', 'basil', 'garlic powder',
               'super lean ground beef', 'cheddar', 'chicken bouillon cubes',
               'chicken', 'green pepper', 'honey', 'ground lamb', 'spaghetti',
               'ground ham', 'oregano', 'corn', 'cooking oil', 'solid white tuna',
               'japanese style vegetables', 'corned beef', 'grated cheese',
               'spaghetti sauce', 'frozen cauliflower', 'cheddar cheese soup',
               'worcestershire sauce', 'stuffing mix', 'mix', 'scallops',
               'broccoli cheese soup', 'eggs', 'mozzarella cheese', 'rye bread',
               'salad style', 'cornstarch', 'broccoli', 'butter',
               'cream of asparagus soup', 'chicken breasts', 'tomato paste',
               'mozzarella', 'ginger', 'celery', 'sausage', 'helper',
               'lemon juice', 'cheddar cheese', 'onion powder', 'onion',
               'chicken soup', 'crab meat', 'flour', 'chicken broth', 'paprika',
               'onion soup mix', 'salt', 'black pepper', 'heavy cream', 'brisket',
               'spareribs', 'frozen green peas', 'corn bread',
               'hot buttered noodles', 'instant rice', 'dry mustard', 'ham steak',
               'boneless veal', 'sesame seed', 'egg', 'condensed cream',
               'tomatoes', 'bulk sweet sausage', 'garbanzo beans', 'corn flakes',
               'fresh asparagus', 'head cabbage', 'cheese', 'cider vinegar',
               'ham end', 'tomato sauce', 'zucchini', 'mushroom stems',
               'chili sauce', 'parmesan cheese', 'mushrooms', 'salsa',
               'sesame oil', 'sauerkraut', 'green onions',
               'chicken flavored rice pilaf mix', 'thin', 'ricotta', 'ketchup',
               'egg yolks', 'olive oil', 'bacon', 'pepper', 'green peppers',
               'caraway seed', 'seafood seasoning', 'lean ground beef',
               'mayonnaise', 'parsley', 'italian seasoned bread crumbs',
               'condensed mushroom soup', 'boneless pork'], dtype='<U31')         ,
        'www.cookbooks.com', 4, 407]], dtype=object)

image.png

In some recipes as shown in above image, does not have proper directions (description) is written in some recipe.

In [18]:
df.head(1).values
Out[18]:
array([['No-Bake Nut Cookies',
        list(['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla', '1/2 c. broken nuts (pecans)', '2 Tbsp. butter or margarine', '3 1/2 c. bite size shredded rice biscuits']),
        list(['In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine.', 'Stir over medium heat until mixture bubbles all over top.', 'Boil and stir 5 minutes more. Take off heat.', 'Stir in vanilla and cereal; mix well.', 'Using 2 teaspoons, drop and shape into 30 clusters on wax paper.', 'Let stand until firm, about 30 minutes.']),
        'www.cookbooks.com/Recipe-Details.aspx?id=44874', 'Gathered',
        array(['bite size shredded rice biscuits', 'vanilla', 'brown sugar',
               'nuts', 'milk', 'butter'], dtype='<U32')                     ,
        'www.cookbooks.com', 6, 6]], dtype=object)
In [19]:
plt.figure(figsize=(16, 6))
sns.boxplot(x=df['No_of_Steps_to_make_recipe'])
plt.show()
In [20]:
df['No_of_Steps_to_make_recipe'].min(), df['No_of_Steps_to_make_recipe'].mean(), df['No_of_Steps_to_make_recipe'].max()
Out[20]:
(1, 6.61362790893632, 164)

The average number of steps to make recipe is 7, but there maximum value is 164. This high rangle of average and maximum and above box plot shows that there are outliers means some recipe (a very small portion) takes very long time as compared to most of the recipes.

Issue with Textual Data¶

image.png

Textual data such as ingredient names contains unnecessary characters or symbols which needs to be removed before analysis.

Exploratory Data Analysis (EDA)¶

In [21]:
def labeled_barplot(data, feature, perc=False, n=None, title=None):
    """
    Barplot with percentage at the top

    data: dataframe
    feature: dataframe column
    perc: whether to display percentages instead of count (default is False)
    n: displays the top n category levels (default is None, i.e., display all levels)
    """

    total = len(data[feature])  # length of the column
    count = data[feature].nunique()
    if n is None:
        plt.figure(figsize=(count + 2, 6))
    else:
        plt.figure(figsize=(n + 2, 6))

    plt.xticks(rotation=90, fontsize=15)
    ax = sns.countplot(
        data=data,
        x=feature,
        palette="Paired",
        order=data[feature].value_counts().index[:n],
    )

    for p in ax.patches:
        if perc == True:
            label = "{:.1f}%".format(
                100 * p.get_height() / total
            )  # percentage of each class of the category
        else:
            label = p.get_height()  # count of each level of the category

        x = p.get_x() + p.get_width() / 2  # width of the plot
        y = p.get_height()  # height of the plot

        ax.annotate(
            label,
            (x, y),
            ha="center",
            va="center",
            size=12,
            xytext=(0, 5),
            textcoords="offset points",
        )  # annotate the percentage
    
    plt.title(title)
    plt.show()  # show the plot
In [22]:
labeled_barplot(df, "title", perc=True, n=20, title="Top 20 Recipes")
In [17]:
df[df['title'] == "Chicken Casserole"]
Out[17]:
title ingredients directions link source NER site No_of_Steps_to_make_recipe No_of_ingredients_in_recipe
63 Chicken Casserole [1 can cream of mushroom soup, 1 can cream of ... [Mix all ingredients together in baking dish.,... www.cookbooks.com/Recipe-Details.aspx?id=665397 Gathered [cream of chicken soup, cream of mushroom soup... www.cookbooks.com 3 6
131 Chicken Casserole [1/2 c. raw rice, 1 can French onion soup, 1 c... [Mix all together; pour into a 7 x 12-inch cas... www.cookbooks.com/Recipe-Details.aspx?id=661800 Gathered [cream of chicken, onion soup, mushrooms, bite... www.cookbooks.com 3 5
356 Chicken Casserole [1 1/2 c. uncooked rice, 1 envelope onion soup... [Spread rice in bottom of a 9 x 13-inch glass ... www.cookbooks.com/Recipe-Details.aspx?id=419575 Gathered [onion soup, cream of mushroom soup, water, ri... www.cookbooks.com 4 5
428 Chicken Casserole [1 large pkg. spinach noodles, cooked in broth... [Cook chicken. Saute celery, pepper and onion.... www.cookbooks.com/Recipe-Details.aspx?id=7283 Gathered [velveeta cheese, chicken breasts, spinach noo... www.cookbooks.com 1 8
539 Chicken Casserole [4 c. cooked chicken, 1 c. chopped celery, 1 c... [Mix first nine ingredients., Saute 5 minutes ... www.cookbooks.com/Recipe-Details.aspx?id=115061 Gathered [cream of chicken soup, slivered almonds, lemo... www.cookbooks.com 4 12
... ... ... ... ... ... ... ... ... ...
2155999 Chicken Casserole [1 lb Chicken Breast (cubed), 5 Carrots (slice... [Preheat oven to 350F, Boil carrots, Cube chic... cookpad.com/us/recipes/346285-chicken-casserole Recipes1M [cream of chicken soup, sour cream, corn, ranc... cookpad.com 8 12
2169171 Chicken Casserole [1 (6 ounce) packagestove top chicken flavor s... [Preheat oven to 350 degrees F., Prepare stuff... www.food.com/recipe/chicken-casserole-268828 Recipes1M [velveeta cheese, condensed cream, chicken fla... www.food.com 6 4
2199641 Chicken Casserole [2 cups cooked egg noodles, 2 (10 3/4 ounce) c... [Preheat oven to 350., In a large bowl, toss t... www.food.com/recipe/chicken-casserole-228128 Recipes1M [pimiento, cream of mushroom soup, sherry wine... www.food.com 5 10
2214015 Chicken Casserole [1 12 loaves artisan bread (example-sourdough ... [At least 8 hours before you plan to serve thi... www.food.com/recipe/chicken-casserole-522180 Recipes1M [bread, chicken breasts, unsalted butter, heav... www.food.com 25 17
2226534 Chicken Casserole [1 tablespoon butter, 1 tablespoon olive oil, ... [Saute leek, onion and garlic in melted butter... www.food.com/recipe/chicken-casserole-171504 Recipes1M [corn, ground cumin, chicken breasts, potato, ... www.food.com 10 18

4099 rows × 9 columns

In [154]:
labeled_barplot(df, "source", perc=True, n=2, title="source")
In [158]:
labeled_barplot(df, "site", perc=True, n=20, title="Top 20 site")
In [18]:
labeled_barplot(df, "No_of_Steps_to_make_recipe", perc=True, n=20, title="Top 20 No_of_Steps_to_make_recipe")

Most of the recipes required around 1 to 8 steps to make recipes. THere are some recipes which required large number of steps.

In [19]:
labeled_barplot(df, "No_of_ingredients_in_recipe", perc=True, n=20, title="Top 20 No_of_ingredients_in_recipe")

Most recipes consist of ingredients range from 4 to 11. Some recipes consist of very large number of ingredients.

Importance of Ingredients¶

Here i have plotted the ingredients and their usage in recipes, which shows the importance of recipes. Egg,sugar and salt being the most important ingredients used in recipes.

In [20]:
ner = df.ingredients.explode().reset_index()
In [21]:
labeled_barplot(ner, "ingredients", perc=True, n=20, title="Top 20 ingredients")

Identify All Ingredients Used¶

Use the explode() method and then call the unique() method.

In [22]:
ingredients = df.ingredients.explode().unique()
ingredients
Out[22]:
array(['1 c. firmly packed brown sugar', '1/2 c. evaporated milk',
       '1/2 tsp. vanilla', ..., '1/2 cup very cold seltzer water',
       '1 cup neutral oil, like canola',
       '1/2 cup freshly grated Pecorino Romano cheese, plus more for sprinkling'],
      dtype=object)
In [23]:
print(f"Total Unique Ingredients are {len(ingredients)}")
Total Unique Ingredients are 4682802

Word Cloud Analysis of Recipes¶

In [130]:
stopwords = ['dtype', 'array', 'of', 'the', 'in', 'with', 'or', 'whole', 'all', 'a', 'that', 'using', 'at',
            'and','for','to','until','is', 'then', 'on',"'","U22'","U13'","U32'","U27'","U37'"]

def minimal_wordcloud(df, column):
    text = str(df[column].values)
    wordcloud = WordCloud(width=1000, height=500, stopwords = stopwords).generate(text)
    image = wordcloud.to_image()
    return image

Top/ Important Recipes¶

In [159]:
minimal_wordcloud(df, 'title')
Out[159]:

Top/ Important Ingredients/ Units/ Quantities¶

In [160]:
minimal_wordcloud(df, 'ingredients')
Out[160]:

Top/ Important Directions Words¶

In [161]:
minimal_wordcloud(df, 'directions')
Out[161]:

Top/ Important Words in NER¶

In [134]:
minimal_wordcloud(df, 'NER')
Out[134]:

For 100 common ingredients¶

  • Find the other ingredients they are most often used with each other.
In [36]:
ingredient_uses = df[["title", "NER"]].explode("NER")
ingredient_uses = ingredient_uses.rename(columns={"NER": "ingredient"})
ingredient_uses
Out[36]:
title ingredient
0 No-Bake Nut Cookies bite size shredded rice biscuits
0 No-Bake Nut Cookies vanilla
0 No-Bake Nut Cookies brown sugar
0 No-Bake Nut Cookies nuts
0 No-Bake Nut Cookies milk
... ... ...
2231141 Polpette in Spicy Tomato Sauce tomato sauce
2231141 Polpette in Spicy Tomato Sauce garlic
2231141 Polpette in Spicy Tomato Sauce sausage
2231141 Polpette in Spicy Tomato Sauce bread crumbs
2231141 Polpette in Spicy Tomato Sauce salt

18420503 rows × 2 columns

Pick 20 common ingredients¶

This portion shows the ingredient and top 10 similar or related ingredients.

In [169]:
all_ingredients = ingredient_uses.ingredient.value_counts().head(20).index.tolist()
for i in all_ingredients:
    # Find id's of recipes using the ingredient
    id_list = ingredient_uses[ingredient_uses.ingredient == i].index.unique()
    # Find all rows for the above id's and do value_counts on those rows
    print(i, "with", ingredient_uses[ingredient_uses.index.isin(id_list)].ingredient.value_counts().head(10).index.tolist()[1:])
    print()
salt with ['flour', 'sugar', 'butter', 'eggs', 'onion', 'garlic', 'milk', 'water', 'pepper']

sugar with ['salt', 'flour', 'eggs', 'vanilla', 'butter', 'milk', 'water', 'baking powder', 'baking soda']

butter with ['salt', 'flour', 'sugar', 'eggs', 'milk', 'vanilla', 'brown sugar', 'onion', 'baking powder']

flour with ['salt', 'sugar', 'eggs', 'butter', 'vanilla', 'milk', 'baking powder', 'baking soda', 'egg']

eggs with ['salt', 'sugar', 'flour', 'butter', 'vanilla', 'milk', 'baking powder', 'baking soda', 'brown sugar']

onion with ['salt', 'garlic', 'pepper', 'water', 'butter', 'tomatoes', 'celery', 'olive oil', 'milk']

garlic with ['salt', 'onion', 'olive oil', 'tomatoes', 'water', 'butter', 'pepper', 'parsley', 'oregano']

milk with ['salt', 'sugar', 'flour', 'butter', 'eggs', 'vanilla', 'baking powder', 'onion', 'egg']

water with ['salt', 'sugar', 'flour', 'onion', 'butter', 'garlic', 'eggs', 'milk', 'pepper']

vanilla with ['sugar', 'flour', 'eggs', 'salt', 'butter', 'milk', 'baking powder', 'baking soda', 'brown sugar']

olive oil with ['salt', 'garlic', 'onion', 'tomatoes', 'water', 'pepper', 'parsley', 'butter', 'lemon juice']

pepper with ['salt', 'onion', 'garlic', 'butter', 'milk', 'water', 'olive oil', 'flour', 'tomatoes']

brown sugar with ['salt', 'flour', 'butter', 'vanilla', 'eggs', 'sugar', 'baking soda', 'cinnamon', 'baking powder']

egg with ['salt', 'flour', 'sugar', 'butter', 'milk', 'vanilla', 'baking powder', 'baking soda', 'brown sugar']

tomatoes with ['salt', 'garlic', 'onion', 'olive oil', 'water', 'pepper', 'onions', 'oregano', 'parsley']

baking powder with ['flour', 'salt', 'sugar', 'eggs', 'vanilla', 'butter', 'milk', 'baking soda', 'egg']

lemon juice with ['salt', 'sugar', 'garlic', 'butter', 'olive oil', 'water', 'flour', 'onion', 'eggs']

cinnamon with ['sugar', 'salt', 'flour', 'eggs', 'butter', 'vanilla', 'brown sugar', 'nutmeg', 'baking soda']

sour cream with ['salt', 'butter', 'onion', 'sugar', 'eggs', 'flour', 'cheddar cheese', 'cream cheese', 'garlic']

baking soda with ['flour', 'salt', 'sugar', 'eggs', 'vanilla', 'butter', 'baking powder', 'brown sugar', 'egg']

Network Analysis of Ingredients¶

  • I have used only subset of the data for this analysis, because actual data was very large
  • I have considered only top 20 ingredients and their corelated top 10 ingredients.
In [173]:
all_ingredients = ingredient_uses.ingredient.value_counts().head(20).index.tolist()
In [179]:
network_data = []

for i in all_ingredients:
    
    # Find id's of recipes using the ingredient
    id_list = ingredient_uses[ingredient_uses.ingredient == i].index.unique()
    
    # Find all rows for the above id's and do value_counts on those rows
    for j in ingredient_uses[ingredient_uses.index.isin(id_list)].ingredient.value_counts().head(10).index.tolist()[1:]:
        i = " ".join(re.findall(r'[a-zA-Z]+', i))
        j = " ".join(re.findall(r'[a-zA-Z]+', j))
        network_data.append([i, j])
In [180]:
network_data = pd.DataFrame(data=network_data, columns=['Ingredient', 'Related Ingredients'])

network_data.head()
Out[180]:
Ingredient Related Ingredients
0 salt flour
1 salt sugar
2 salt butter
3 salt eggs
4 salt onion
In [182]:
network_data = network_data.drop_duplicates()
network_data = network_data[network_data["Ingredient"] != network_data["Related Ingredients"]]
In [203]:
def plot_network(dataframe, ingredient_name=None):
    # Create a directed graph
    G = nx.DiGraph()
    
    if ingredient_name is not None:
        dataframe = dataframe[dataframe['Ingredient']==ingredient_name]
    else:
        ingredient_name = ""
        
    # Add nodes and edges from the dataframe
    for index, row in dataframe.iterrows():
        ingredient = row['Ingredient']
        related_ingredients = row['Related Ingredients']
        G.add_node(ingredient, label=ingredient)  # Add 'label' attribute to store the node name
        G.add_node(related_ingredients, label=related_ingredients)  # Add 'label' attribute to store the node name
        G.add_edge(ingredient, related_ingredients)

    # Plot the network
    plt.figure(figsize=(20, 13))
    pos = nx.spring_layout(G, seed=42)
    node_labels = nx.get_node_attributes(G, 'label')
    nx.draw_networkx(G, pos, with_labels=True, node_size=5000, node_color='lightblue', edge_color='gray', font_size=12, labels=node_labels)  # Use 'labels' argument to display node names
    plt.title("Related Ingredients of "+ingredient_name)
    
    # Show the plot
    plt.axis('off')
    plt.show()

plot_network(network_data)
In [204]:
plot_network(network_data, "soda")

Recipe Recommendation based on Ingredients¶

image.png

Calculate TF=IDF of the recipes Ingredients¶

image.png

I have used only 10000 records for recommendation system

In [205]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df["NER"].astype(str)[1:-1][:10000])

Find Cosine Similarity Matrix¶

image.png

In [207]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
In [208]:
df_small = df.reset_index()[:10000]
titles = df_small['title']
indices = pd.Series(df_small.index, index=df_small['title'])
In [209]:
df_small['title'].values
Out[209]:
array(['No-Bake Nut Cookies', "Jewell Ball'S Chicken", 'Creamy Corn', ...,
       'Chicken Casserole', 'Sweet Potatoes Casserole', '7 Layer Salad'],
      dtype=object)
In [210]:
def get_recommendations(title, no_of_recipes):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    recipe_indices = [i[0] for i in sim_scores]
    return titles.iloc[recipe_indices].head(no_of_recipes)

System will recommend Top N recipes based on ingredients similarity¶

Recommed Similar Recipes to Sweet Potatoes Casserole¶

In [211]:
recommended_recipes = get_recommendations("Sweet Potatoes Casserole", 10)
recommended_recipes
Out[211]:
5359       The "Bestest" Banana Bread
7703                    Custard Sauce
623             Baked Pork Tenderloin
3164    Broccoli And Cheese Casserole
3842                 Strawberry Pizza
1952                    Pumpkin Bread
5075            Crustless Egg Custard
2005           Cathedral Window Candy
2385                    Quick Dessert
5969                  Chicken Lasagna
Name: title, dtype: object

Recommed Similar Recipes to Creamy Corn¶

In [212]:
recommended_recipes = get_recommendations("Creamy Corn", 5)
recommended_recipes
Out[212]:
8100               Peanut Butter Pie
3695    Baked Halibut, Italian Style
9641           Chinese Chicken Wings
446            Sour Cream Pound Cake
9470              Broccoli Casserole
Name: title, dtype: object

Which Recipe I can make?¶

If in my kitchen, I have following list of ingredients let say:-

  • 1 c. firmly packed brown sugar
  • 1/2 c. evaporated milk
  • 1/2 tsp. vanilla

Which recipe I can make out of available ingredients? this system will help us to assist in making recipes.

image.png

In [55]:
# Recipe data
recipe_data = df.head(1000)

def find_best_recipe(ingredients):
    best_recipe = None
    best_match_count = 0

    for _, recipe in recipe_data.iterrows():
        recipe_ingredients = recipe['ingredients']
        match_count = sum(1 for ingredient in ingredients if ingredient in recipe_ingredients)

        if match_count > best_match_count:
            best_recipe = recipe
            best_match_count = match_count

    return best_recipe

def print_recipe_directions(recipe, ingredients):
    directions = recipe['directions']
    
    print("You have ingredients:\n")
    print("\n".join(ingredients))
    print("\n")
    print("If you arrange following ingredient(s) then you can make Recipe:\n")
    
    extra = set(recipe['ingredients']).difference(set(ingredients))
    print("\n".join(extra))
    print("\n")
    
    print("Recipe Directions:\n")
    print("\n".join(directions))

Show Recipe for available Ingredients¶

In [57]:
# List of ingredients
input_ingredients = ['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla']

# Find the best recipe
best_recipe = find_best_recipe(input_ingredients)

# Print the best recipe directions
if best_recipe is not None:
    print_recipe_directions(best_recipe, input_ingredients)
else:
    print("No matching recipe found.")
You have ingredients:

1 c. firmly packed brown sugar
1/2 c. evaporated milk
1/2 tsp. vanilla


If you arrange following ingredient(s) then you can make Recipe:

1/2 c. broken nuts (pecans)
3 1/2 c. bite size shredded rice biscuits
2 Tbsp. butter or margarine


Recipe Directions:

In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine.
Stir over medium heat until mixture bubbles all over top.
Boil and stir 5 minutes more. Take off heat.
Stir in vanilla and cereal; mix well.
Using 2 teaspoons, drop and shape into 30 clusters on wax paper.
Let stand until firm, about 30 minutes.

Recommend Recipe for Ingredients¶

  • 1 box powdered sugar
  • 8 oz. soft butter
  • 1 (8 oz.) peanut butter
  • paraffin
In [54]:
# List of ingredients
input_ingredients = ['1 box powdered sugar', '8 oz. soft butter', '1 (8 oz.) peanut butter', 'paraffin']

# Find the best recipe
best_recipe = find_best_recipe(input_ingredients)

# Print the best recipe directions
if best_recipe is not None:
    print_recipe_directions(best_recipe, input_ingredients)
else:
    print("No matching recipe found.")
You have ingredients:

1 box powdered sugar
8 oz. soft butter
1 (8 oz.) peanut butter
paraffin


If you arrange following ingredients then you can make Recipe:

12 oz. chocolate chips


Recipe Directions:

Mix sugar, butter and peanut butter.
Roll into balls and place on cookie sheet.
Set in freezer for at least 30 minutes. Melt chocolate chips and paraffin in double boiler.
Using a toothpick, dip balls 3/4 of way into chocolate chip and paraffin mixture to make them look like buckeyes.

Generative AI for Directions Generation¶

One possible approach for generative AI is to use a text generation model called a Recurrent Neural Network (RNN) with the Long Short-Term Memory (LSTM) architecture. The LSTM model can be trained on the recipe directions from the dataset and then used to generate new recipe directions.

  • We have used recipes ingredients as dataset for LSTM

  • Tokenization is applied on dataset

  • We have trained Deep Neural Network to create system that will generate directions for recipe by giving starting point.

image.png

In [231]:
# Preprocess the recipe directions
directions = df['directions'].head(1000).values.tolist()

# Text preprocessing
tokenizer = Tokenizer()
tokenizer.fit_on_texts(directions)
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in directions:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences for input to the model
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')

# Create predictors and label
predictors, label = input_sequences[:, :-1], input_sequences[:, -1]

# Convert label to one-hot encoding
label = tensorflow.keras.utils.to_categorical(label, num_classes=total_words)

# Build the model
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

# Train the model
model.fit(predictors, label, epochs=100, verbose=2)
Epoch 1/100
128/128 - 10s - loss: 8.4358 - 10s/epoch - 82ms/step
Epoch 2/100
128/128 - 7s - loss: 8.4406 - 7s/epoch - 52ms/step
Epoch 3/100
128/128 - 7s - loss: 8.3241 - 7s/epoch - 52ms/step
Epoch 4/100
128/128 - 7s - loss: 8.1545 - 7s/epoch - 51ms/step
Epoch 5/100
128/128 - 7s - loss: 7.8082 - 7s/epoch - 52ms/step
Epoch 6/100
128/128 - 7s - loss: 7.3336 - 7s/epoch - 52ms/step
Epoch 7/100
128/128 - 7s - loss: 6.7790 - 7s/epoch - 51ms/step
Epoch 8/100
128/128 - 7s - loss: 6.1823 - 7s/epoch - 52ms/step
Epoch 9/100
128/128 - 7s - loss: 5.5900 - 7s/epoch - 52ms/step
Epoch 10/100
128/128 - 7s - loss: 5.0163 - 7s/epoch - 51ms/step
Epoch 11/100
128/128 - 7s - loss: 4.4567 - 7s/epoch - 52ms/step
Epoch 12/100
128/128 - 7s - loss: 3.9323 - 7s/epoch - 52ms/step
Epoch 13/100
128/128 - 7s - loss: 3.4422 - 7s/epoch - 51ms/step
Epoch 14/100
128/128 - 7s - loss: 2.9918 - 7s/epoch - 52ms/step
Epoch 15/100
128/128 - 7s - loss: 2.5750 - 7s/epoch - 52ms/step
Epoch 16/100
128/128 - 7s - loss: 2.1964 - 7s/epoch - 52ms/step
Epoch 17/100
128/128 - 7s - loss: 1.8682 - 7s/epoch - 52ms/step
Epoch 18/100
128/128 - 7s - loss: 1.5784 - 7s/epoch - 52ms/step
Epoch 19/100
128/128 - 7s - loss: 1.3308 - 7s/epoch - 51ms/step
Epoch 20/100
128/128 - 7s - loss: 1.1199 - 7s/epoch - 52ms/step
Epoch 21/100
128/128 - 7s - loss: 0.9449 - 7s/epoch - 51ms/step
Epoch 22/100
128/128 - 7s - loss: 0.7964 - 7s/epoch - 52ms/step
Epoch 23/100
128/128 - 7s - loss: 0.6743 - 7s/epoch - 51ms/step
Epoch 24/100
128/128 - 7s - loss: 0.5716 - 7s/epoch - 51ms/step
Epoch 25/100
128/128 - 7s - loss: 0.4872 - 7s/epoch - 52ms/step
Epoch 26/100
128/128 - 7s - loss: 0.4164 - 7s/epoch - 52ms/step
Epoch 27/100
128/128 - 7s - loss: 0.3584 - 7s/epoch - 53ms/step
Epoch 28/100
128/128 - 7s - loss: 0.3133 - 7s/epoch - 51ms/step
Epoch 29/100
128/128 - 7s - loss: 0.2754 - 7s/epoch - 52ms/step
Epoch 30/100
128/128 - 7s - loss: 0.2451 - 7s/epoch - 51ms/step
Epoch 31/100
128/128 - 7s - loss: 0.2176 - 7s/epoch - 52ms/step
Epoch 32/100
128/128 - 7s - loss: 0.1961 - 7s/epoch - 52ms/step
Epoch 33/100
128/128 - 7s - loss: 0.1791 - 7s/epoch - 52ms/step
Epoch 34/100
128/128 - 7s - loss: 0.1647 - 7s/epoch - 52ms/step
Epoch 35/100
128/128 - 7s - loss: 0.1518 - 7s/epoch - 52ms/step
Epoch 36/100
128/128 - 7s - loss: 0.1405 - 7s/epoch - 52ms/step
Epoch 37/100
128/128 - 7s - loss: 0.1311 - 7s/epoch - 51ms/step
Epoch 38/100
128/128 - 7s - loss: 0.1236 - 7s/epoch - 52ms/step
Epoch 39/100
128/128 - 7s - loss: 0.1162 - 7s/epoch - 52ms/step
Epoch 40/100
128/128 - 7s - loss: 0.1099 - 7s/epoch - 52ms/step
Epoch 41/100
128/128 - 7s - loss: 0.1048 - 7s/epoch - 52ms/step
Epoch 42/100
128/128 - 7s - loss: 0.0988 - 7s/epoch - 52ms/step
Epoch 43/100
128/128 - 7s - loss: 0.0960 - 7s/epoch - 52ms/step
Epoch 44/100
128/128 - 7s - loss: 0.0917 - 7s/epoch - 52ms/step
Epoch 45/100
128/128 - 7s - loss: 0.0877 - 7s/epoch - 52ms/step
Epoch 46/100
128/128 - 7s - loss: 0.0846 - 7s/epoch - 52ms/step
Epoch 47/100
128/128 - 7s - loss: 0.0815 - 7s/epoch - 52ms/step
Epoch 48/100
128/128 - 7s - loss: 0.0786 - 7s/epoch - 51ms/step
Epoch 49/100
128/128 - 7s - loss: 0.0765 - 7s/epoch - 52ms/step
Epoch 50/100
128/128 - 7s - loss: 0.0754 - 7s/epoch - 52ms/step
Epoch 51/100
128/128 - 7s - loss: 0.0728 - 7s/epoch - 51ms/step
Epoch 52/100
128/128 - 7s - loss: 0.0710 - 7s/epoch - 52ms/step
Epoch 53/100
128/128 - 7s - loss: 0.0691 - 7s/epoch - 52ms/step
Epoch 54/100
128/128 - 7s - loss: 0.0687 - 7s/epoch - 51ms/step
Epoch 55/100
128/128 - 7s - loss: 0.0669 - 7s/epoch - 52ms/step
Epoch 56/100
128/128 - 7s - loss: 0.0649 - 7s/epoch - 52ms/step
Epoch 57/100
128/128 - 7s - loss: 0.0634 - 7s/epoch - 52ms/step
Epoch 58/100
128/128 - 7s - loss: 0.0625 - 7s/epoch - 52ms/step
Epoch 59/100
128/128 - 7s - loss: 0.0618 - 7s/epoch - 52ms/step
Epoch 60/100
128/128 - 7s - loss: 0.0615 - 7s/epoch - 51ms/step
Epoch 61/100
128/128 - 7s - loss: 0.0605 - 7s/epoch - 52ms/step
Epoch 62/100
128/128 - 7s - loss: 0.0587 - 7s/epoch - 52ms/step
Epoch 63/100
128/128 - 7s - loss: 0.0582 - 7s/epoch - 52ms/step
Epoch 64/100
128/128 - 7s - loss: 0.0607 - 7s/epoch - 52ms/step
Epoch 65/100
128/128 - 7s - loss: 0.0587 - 7s/epoch - 52ms/step
Epoch 66/100
128/128 - 7s - loss: 0.0574 - 7s/epoch - 52ms/step
Epoch 67/100
128/128 - 7s - loss: 0.0582 - 7s/epoch - 52ms/step
Epoch 68/100
128/128 - 7s - loss: 0.0599 - 7s/epoch - 52ms/step
Epoch 69/100
128/128 - 7s - loss: 0.0584 - 7s/epoch - 52ms/step
Epoch 70/100
128/128 - 7s - loss: 0.0563 - 7s/epoch - 52ms/step
Epoch 71/100
128/128 - 7s - loss: 0.0552 - 7s/epoch - 52ms/step
Epoch 72/100
128/128 - 7s - loss: 0.0537 - 7s/epoch - 52ms/step
Epoch 73/100
128/128 - 7s - loss: 0.0538 - 7s/epoch - 51ms/step
Epoch 74/100
128/128 - 7s - loss: 0.0530 - 7s/epoch - 52ms/step
Epoch 75/100
128/128 - 7s - loss: 0.0534 - 7s/epoch - 52ms/step
Epoch 76/100
128/128 - 7s - loss: 0.0526 - 7s/epoch - 52ms/step
Epoch 77/100
128/128 - 7s - loss: 0.0521 - 7s/epoch - 52ms/step
Epoch 78/100
128/128 - 7s - loss: 0.0524 - 7s/epoch - 52ms/step
Epoch 79/100
128/128 - 7s - loss: 0.0521 - 7s/epoch - 53ms/step
Epoch 80/100
128/128 - 7s - loss: 0.0512 - 7s/epoch - 55ms/step
Epoch 81/100
128/128 - 7s - loss: 0.0510 - 7s/epoch - 52ms/step
Epoch 82/100
128/128 - 7s - loss: 0.0510 - 7s/epoch - 52ms/step
Epoch 83/100
128/128 - 7s - loss: 0.0506 - 7s/epoch - 52ms/step
Epoch 84/100
128/128 - 7s - loss: 0.0499 - 7s/epoch - 52ms/step
Epoch 85/100
128/128 - 7s - loss: 0.0500 - 7s/epoch - 52ms/step
Epoch 86/100
128/128 - 7s - loss: 0.0506 - 7s/epoch - 52ms/step
Epoch 87/100
128/128 - 7s - loss: 0.0495 - 7s/epoch - 52ms/step
Epoch 88/100
128/128 - 7s - loss: 0.0507 - 7s/epoch - 52ms/step
Epoch 89/100
128/128 - 7s - loss: 0.0531 - 7s/epoch - 52ms/step
Epoch 90/100
128/128 - 7s - loss: 0.0613 - 7s/epoch - 52ms/step
Epoch 91/100
128/128 - 7s - loss: 0.0633 - 7s/epoch - 52ms/step
Epoch 92/100
128/128 - 7s - loss: 0.0573 - 7s/epoch - 52ms/step
Epoch 93/100
128/128 - 7s - loss: 0.0513 - 7s/epoch - 52ms/step
Epoch 94/100
128/128 - 7s - loss: 0.0493 - 7s/epoch - 53ms/step
Epoch 95/100
128/128 - 7s - loss: 0.0488 - 7s/epoch - 52ms/step
Epoch 96/100
128/128 - 7s - loss: 0.0490 - 7s/epoch - 52ms/step
Epoch 97/100
128/128 - 7s - loss: 0.0480 - 7s/epoch - 52ms/step
Epoch 98/100
128/128 - 7s - loss: 0.0482 - 7s/epoch - 52ms/step
Epoch 99/100
128/128 - 7s - loss: 0.0482 - 7s/epoch - 52ms/step
Epoch 100/100
128/128 - 7s - loss: 0.0482 - 7s/epoch - 52ms/step
Out[231]:
<keras.src.callbacks.History at 0x238d7322d10>

How System Work?¶

  • If we give input In a large bowl, mix
  • Using DNN based system, it will geneate next text as
  • In a large bowl, mix peel and slice. set aside. put in casserole dish. brush with remaining barbecue sauce.
In [238]:
# Generate new recipe directions
seed_text = "In a large bowl, mix"
next_words = 3

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre')  # Modify maxlen argument
    predicted = model.predict(token_list, verbose=0)  # Replace predict_classes with predict
    
    predicted_word_index = np.argmax(predicted)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted_word_index:
            output_word = word
            break
    seed_text += " " + output_word

# Print the generated recipe directions
print("Generated Directions:", seed_text)
Generated Directions: In a large bowl, mix peel and slice. set aside. put in casserole dish. brush with remaining barbecue sauce. conventional oven:

Example 2¶

  • If we give input Get 2 tsp sugar and
  • Using DNN based system, it will geneate next text as
  • Get 2 tsp sugar and add water, frozen vegetables, potatoes and celery. turn cake out onto a heavy piece of aluminum foil which has been placed on a cookie sheet. bake in moderate oven until brown.
In [240]:
# Generate new recipe directions
seed_text = "Get 2 tsp sugar and"
next_words = 3

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre')  # Modify maxlen argument
    predicted = model.predict(token_list, verbose=0)  # Replace predict_classes with predict
    
    predicted_word_index = np.argmax(predicted)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted_word_index:
            output_word = word
            break
    seed_text += " " + output_word

# Print the generated recipe directions
print("Generated Directions:", seed_text)
Generated Directions: Get 2 tsp sugar and add water, frozen vegetables, potatoes and celery. turn cake out onto a heavy piece of aluminum foil which has been placed on a cookie sheet. bake in moderate oven until brown.

Clustering or Grouping of Recipes¶

  • One possible use case is in the domain of recipe recommendation systems. By clustering recipes based on their ingredient similarity, we can identify groups of recipes that share common ingredients. This allows us to recommend alternative recipes to users based on their preferred ingredients or dietary restrictions. For example, if a user searches for a recipe containing chicken, the system can suggest other recipes from the same cluster that also feature chicken as a main ingredient.

  • Another use case is in the analysis of recipe databases or food websites. Clustering recipes based on ingredients helps to organize and categorize a large collection of recipes. This can assist in structuring recipe repositories, making it easier for users to navigate and search for specific types of recipes. It can also help identify popular ingredient combinations and culinary trends.

  • Additionally, the clustering results can provide insights into the relationships between different types of dishes or cuisines. By examining the clusters and their corresponding recipes, we can observe patterns and associations between ingredients used in specific culinary traditions or cultural contexts.

image.png

In [60]:
# Load the recipe dataset
recipe_data = df.head(45)

# Preprocess NER column to extract ingredients

# Text vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(recipe_data['NER'].astype(str))

# Clustering
n_clusters = 3
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(X)

# Dimensionality reduction
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X.toarray())

# Plot recipe clusters
colors = ['red', 'green', 'blue']  # Customize cluster colors
markers = ['o', 's', 'D']  # Customize cluster markers

plt.figure(figsize=(8, 6))
for i in range(n_clusters):
    cluster_points = X_pca[kmeans.labels_ == i]
    plt.scatter(cluster_points[:, 0], cluster_points[:, 1], c=colors[i], marker=markers[i], label=f'Cluster {i+1}')

plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Recipe Clusters')
plt.legend()
plt.show()

# Display recipe clusters
recipe_data['Cluster'] = kmeans.labels_

for cluster_id in range(n_clusters):
    cluster_recipes = recipe_data[recipe_data['Cluster'] == cluster_id]
    print(f"\nCluster {cluster_id+1} Recipes:")
    for _, recipe in cluster_recipes.iterrows():
        print(recipe['title'])
F:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
Cluster 1 Recipes:
Nolan'S Pepper Steak
Double Cherry Delight
Quick Barbecue Wings
Pink Stuff(Frozen Dessert)  
Fresh Strawberry Pie
Cuddy Farms Marinated Turkey
Spaghetti Sauce To Can
Prize-Winning Meat Loaf
Corral Barbecued Beef Steak Strips
One Hour Rolls
Punch Bowl Fruit Salad
Summer Chicken
Tuna Macaroni Casserole
Artichoke Dip
Summer Spaghetti
Watermelon Rind Pickles
Angel Biscuits
Quick Peppermint Puffs

Cluster 2 Recipes:
No-Bake Nut Cookies
Reeses Cups(Candy)  
Rhubarb Coffee Cake
Millionaire Pie
Buckeye Candy
Easy German Chocolate Cake
Broccoli Salad
Eggless Milkless Applesauce Cake
Grandma Hanrath'S Banana Breadfort Collins, Colorado  
Easy Fudge
Cherry Pizza
Chicken Stew
Quick Coffee Cake(6 Servings)  
Fruit Pizza

Cluster 3 Recipes:
Jewell Ball'S Chicken
Creamy Corn
Chicken Funny
Cheeseburger Potato Soup
Scalloped Corn
Taco Salad Chip Dip
Strawberry Whatever
Chocolate Frango Mints
Smothered Round Steak(Servings: 4)  
Taco-Filled Green Pepper
Potato And Cheese Pie
Broccoli Dip For Crackers
Pear-Lime Salad
C:\Users\Admin\AppData\Local\Temp\ipykernel_11224\1220178402.py:41: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recipe_data['Cluster'] = kmeans.labels_

image.png

In [ ]: